Metris over replicates for diseases with too many false iterations

Based on the new replicates of the mappings of RCTs across diseases that had too many false iterations, we will compute for each disease separately, and for each replicate:

  • Nb trials per region relevant to the disease
  • Nb trials per region relevant to the burden of diseases
  • Nb trials world wide and in non-high-income countries relevant to the disease and to the burden
  • Idem for number patients

In [1]:
dis <- as.numeric(list.files("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates_add/"))
dis


  1. 23
  2. 9

In [2]:
library(data.table)
library(foreach)
library(doParallel)
options(warn = 2)

#Upload database
data <- read.table("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/database_RCTs_regions_27diseases.txt")

#Upload traduction names/label categories
Mgbd <- read.table("../Data/27_gbd_groups.txt")


Loading required package: iterators
Loading required package: parallel

In [3]:
#Regions per trial
regs <- sort(unique(unlist(strsplit(as.character(data$Regions),"&"))))
LR <- lapply(regs,function(x){1:nrow(data)%in%grep(x,data$Regions)})
LR <- do.call('cbind',LR)
LR <- data.table(LR)
LR$TrialID <- data$TrialID

#Nb of patients per region per trial
#Supressing sample size of trials with sample size below 10 and above 200k
data$Sample[data$Sample<10 | data$Sample>200000] <- NA
#Nb countries per region per trial to distribute sample size equally across countries
nb_ctrs <- lapply(strsplit(as.character(data$Nb_ctr_per_reg),'&'),as.numeric)
RGs <-strsplit(as.character(data$Regions),'&')
pats <- data.frame(TrialID = rep(data$TrialID,sapply(nb_ctrs,length)),
                   Nb_ctrs = unlist(nb_ctrs),
                   Region = unlist(RGs),
                   Tot_sample = rep(data$Sample,sapply(nb_ctrs,length)))

pats$tot_ctrs <- rep(sapply(nb_ctrs,sum),sapply(nb_ctrs,length))
pats$sample_per_reg <- pats$Tot_sample*pats$Nb_ctrs/pats$tot_ctrs
pats <- data.table(pats)
setkey(pats,TrialID)

In [4]:
t0 <- proc.time()

for(d in dis){

tp0 <- proc.time()
print(paste("starting disease ",d,": ",as.character(Mgbd$x[d])),collapse="") 

SMs <- list.files(paste("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates_add/",as.character(d),sep=""))
SMs <- SMs[grep("Reclassif",SMs)]
if(length(SMs)<9000) {
print(paste(c("disease ",d,": ",as.character(Mgbd$x[d])," has only ",length(SMs)," replicates: we pass to next one"),collapse=""))
next
}

cl<-makeCluster(4)
registerDoParallel(cl)

A <- foreach(k = SMs, .packages="data.table") %dopar% {

	repl <- fread(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates_add/",as.character(d),"/",k),collapse=""))
	repl$TrialID <- LR$TrialID
	setkey(repl,TrialID)
	replpats <- merge(pats,repl)
	setkey(replpats,Region)
	
	#Output data
	df <- data.table(Region=c(sort(regs),"All","Non-HI"),Dis=rep(c("dis","all"),each=9),RCTs=as.integer(0),Patients=as.numeric(0))

	#Par région
	#Nb trials par region concernant la maladie and relevant to GBD
	df[Dis=="dis" & Region%in%regs,RCTs:=table(replpats[recl_dis==1,Region])]
	df[Dis=="all" & Region%in%regs,RCTs:=table(replpats[recl_dis+recl_oth>=1,Region])]
	#Nb patients par région concernant la maladie and relevant to GBD
	df[Dis=="dis" & Region%in%regs,Patients:=replpats[recl_dis==1,][regs,sum(sample_per_reg,na.rm=TRUE),by=.EACHI]$V1]
	df[Dis=="all" & Region%in%regs,Patients:=replpats[recl_dis+recl_oth>=1,][regs,sum(sample_per_reg,na.rm=TRUE),by=.EACHI]$V1]

	#WorldWide
	#Nb trials worldwide concernant la maladie and relevant to GBD
	df[Dis=="dis" & Region=="All",RCTs:=sum(repl$recl_dis)]
	df[Dis=="all" & Region=="All",RCTs:=sum(repl$recl_dis+repl$recl_oth>=1)]
	#Nb patients worldwide concernant la maladie and relevant to GBD
	df[Dis=="dis" & Region=="All",Patients:=sum(replpats[recl_dis==1,sample_per_reg],na.rm=TRUE)]
	df[Dis=="all" & Region=="All",Patients:=sum(replpats[recl_dis+recl_oth>=1,sample_per_reg],na.rm=TRUE)]

	#Non-HI countries
	#Nb trials worldwide concernant la maladie and relevant to GBD
	df[Dis=="dis" & Region=="Non-HI",RCTs:=replpats[Region!="High-income",][recl_dis==1,][!duplicated(TrialID),.N]]
	df[Dis=="all" & Region=="Non-HI",RCTs:=replpats[Region!="High-income",][recl_dis+recl_oth>=1,][!duplicated(TrialID),.N]]
	#Nb patients worldwide concernant la maladie and relevant to GBD
	df[Dis=="dis" & Region=="Non-HI",Patients:=sum(replpats[Region!="High-income",][recl_dis==1,sample_per_reg],na.rm=TRUE)]
	df[Dis=="all" & Region=="Non-HI",Patients:=sum(replpats[Region!="High-income",][recl_dis+recl_oth>=1,sample_per_reg],na.rm=TRUE)]

}

stopCluster(cl)

fwrite(rbindlist(A),paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/Metrics_over_replicates_",as.character(d),".txt"),collapse=""))
rm(A)

tp1 <- proc.time()
print(paste(c("disease ",d,": ",as.character(Mgbd$x[d])," finished after (min):"),collapse=""))
print((tp1-tp0)/60)
}

t1 <- proc.time()

print("total time (hrs):")
print((t1-t0)/3600)


[1] "starting disease  23 :  Congenital anomalies"
[1] "disease 23: Congenital anomalies finished after (min):"
        user       system      elapsed 
 0.150583333  0.009166667 21.880166667 
[1] "starting disease  9 :  Sexually transmitted diseases excluding HIV"
[1] "disease 9: Sexually transmitted diseases excluding HIV finished after (min):"
        user       system      elapsed 
 0.158150000  0.008466667 21.421450000 
[1] "total time (hrs):"
        user       system      elapsed 
0.0051516667 0.0002938889 0.7218480556 

In [ ]: